#### R Script 
### Data Setup

### R 4.0.2
### R Studio Version 1.3.1093

### Used Packages: #####
library(dplyr)

# Setup #####
setwd("F:/Views Competition/Update September 2020")

memory.size(32000)

# Environment
load("F:/Views Competition/Update September 2020/Views September 2020 25 09 2020.RData")


## Load Datasets #####

skeleton <- read.csv("skeleton_cm_global.csv", header=TRUE)

ged <- read.csv("ged_cm.csv", header=TRUE)
acled <- read.csv("acled_cm.csv", header=TRUE)
ces <- read.csv("ces_cm_imp_sklearn_0.csv", header=TRUE)
imf <- read.csv("imfweo_202006_cm_imp_sklearn_0.csv", header=TRUE)  
reign <- read.csv("reign_v2_cm_extrapolated.csv", header=TRUE) 
fvp <- read.csv("fvp_v2_cy_imp_sklearn_0.csv", header=TRUE)

vdem <- read.csv("vdem_v10_cy_imp_sklearn_0_part_1.csv", header=TRUE)
vdem2 <- read.csv("vdem_v10_cy_imp_sklearn_0_part_2.csv", header=TRUE)
vdem3 <- read.csv("vdem_v10_cy_imp_sklearn_0_part_3.csv", header=TRUE)

wdi <- read.csv("wdi_202005_cy_imp_sklearn_0_part_1.csv", header=TRUE)
wdi2 <- read.csv("wdi_202005_cy_imp_sklearn_0_part_2.csv", header=TRUE)




### GED Data #####
## create delta

views.ged <- left_join(skeleton, ged, by = c('month_id','country_id'))

### shift s1
ged[,1:8] -> ged_log

ged_log$month_id + 1 -> ged_log$month_id
colnames(ged_log) <- c("month_id", "country_id", "ged_best_sb_s1", "ged_best_ns_s1", "ged_best_os_s1", "ged_count_sb_s1", "ged_count_ns_s1", "ged_count_os_s1")
views.ged <- left_join(views.ged, ged_log, by = c('month_id','country_id'))

### shift s2
ged[,1:8] -> ged_log2

ged_log2$month_id + 2 -> ged_log2$month_id
colnames(ged_log2) <- c("month_id", "country_id", "ged_best_sb_s2", "ged_best_ns_s2", "ged_best_os_s2", "ged_count_sb_s2", "ged_count_ns_s2", "ged_count_os_s2")
views.ged <- left_join(views.ged, ged_log2, by = c('month_id','country_id'))

### shift s3
ged[,1:8] -> ged_log3

ged_log3$month_id + 3 -> ged_log3$month_id
colnames(ged_log3) <- c("month_id", "country_id", "ged_best_sb_s3", "ged_best_ns_s3", "ged_best_os_s3", "ged_count_sb_s3", "ged_count_ns_s3", "ged_count_os_s3")
views.ged <- left_join(views.ged, ged_log3, by = c('month_id','country_id'))

### shift s4
ged[,1:8] -> ged_log4

ged_log4$month_id + 4 -> ged_log4$month_id
colnames(ged_log4) <- c("month_id", "country_id", "ged_best_sb_s4", "ged_best_ns_s4", "ged_best_os_s4", "ged_count_sb_s4", "ged_count_ns_s4", "ged_count_os_s4")
views.ged <- left_join(views.ged, ged_log4, by = c('month_id','country_id'))

### shift s5
ged[,1:8] -> ged_log5

ged_log5$month_id + 5 -> ged_log5$month_id
colnames(ged_log5) <- c("month_id", "country_id", "ged_best_sb_s5", "ged_best_ns_s5", "ged_best_os_s5", "ged_count_sb_s5", "ged_count_ns_s5", "ged_count_os_s5")
views.ged <- left_join(views.ged, ged_log5, by = c('month_id','country_id'))

### shift s6
ged[,1:8] -> ged_log6

ged_log6$month_id + 6 -> ged_log6$month_id
colnames(ged_log6) <- c("month_id", "country_id", "ged_best_sb_s6", "ged_best_ns_s6", "ged_best_os_s6", "ged_count_sb_s6", "ged_count_ns_s6", "ged_count_os_s6")
views.ged <- left_join(views.ged, ged_log6, by = c('month_id','country_id'))



###### Timeshift for predictions #####

### DELTA s1
ged[,1:3] -> ged_log20
ged_log20$month_id - 1 -> ged_log20$month_id
colnames(ged_log20) <- c("month_id", "country_id", "ged_best_sb_s-1")

views.ged <- left_join(views.ged, ged_log20, by = c('month_id','country_id'))

ged[,1:3] -> ged_log21
ged_log21$month_id - 2 -> ged_log21$month_id
colnames(ged_log21) <- c("month_id", "country_id", "ged_best_sb_s-2")

views.ged <- left_join(views.ged, ged_log21, by = c('month_id','country_id'))

ged[,1:3] -> ged_log22
ged_log22$month_id - 3 -> ged_log22$month_id
colnames(ged_log22) <- c("month_id", "country_id", "ged_best_sb_s-3")

views.ged <- left_join(views.ged, ged_log22, by = c('month_id','country_id'))

ged[,1:3] -> ged_log23
ged_log23$month_id - 4 -> ged_log23$month_id
colnames(ged_log23) <- c("month_id", "country_id", "ged_best_sb_s-4")

views.ged <- left_join(views.ged, ged_log23, by = c('month_id','country_id'))

ged[,1:3] -> ged_log24
ged_log24$month_id - 5 -> ged_log24$month_id
colnames(ged_log24) <- c("month_id", "country_id", "ged_best_sb_s-5")

views.ged <- left_join(views.ged, ged_log24, by = c('month_id','country_id'))

ged[,1:3] -> ged_log25
ged_log25$month_id - 6 -> ged_log25$month_id
colnames(ged_log25) <- c("month_id", "country_id", "ged_best_sb_s-6")

views.ged <- left_join(views.ged, ged_log25, by = c('month_id','country_id'))

ged[,1:3] -> ged_log26
ged_log26$month_id - 7 -> ged_log26$month_id
colnames(ged_log26) <- c("month_id", "country_id", "ged_best_sb_s-7")

views.ged <- left_join(views.ged, ged_log26, by = c('month_id','country_id'))


# delta algorithm
views.ged$ln_ged_best_sb_s1 <- log(views.ged$`ged_best_sb_s-1`+1) - log(views.ged$ged_best_sb+1)
views.ged$ln_ged_best_sb_s2 <- log(views.ged$`ged_best_sb_s-2`+1) - log(views.ged$ged_best_sb+1)

views.ged$ln_ged_best_sb_s3 <- log(views.ged$`ged_best_sb_s-3`+1) - log(views.ged$ged_best_sb+1)
views.ged$ln_ged_best_sb_s4 <- log(views.ged$`ged_best_sb_s-4`+1) - log(views.ged$ged_best_sb+1)

views.ged$ln_ged_best_sb_s5 <- log(views.ged$`ged_best_sb_s-5`+1) - log(views.ged$ged_best_sb+1)
views.ged$ln_ged_best_sb_s6 <- log(views.ged$`ged_best_sb_s-6`+1) - log(views.ged$ged_best_sb+1)

views.ged$ln_ged_best_sb_s7 <- log(views.ged$`ged_best_sb_s-7`+1) - log(views.ged$ged_best_sb+1)

######


## and add them!

write.csv(views.ged, file="views_ged.csv", row.names=FALSE) 

views.ged <- read.csv("views_ged.csv", header=TRUE)
#####

###+###+###+###


## REIGN data ##### not included in the final predictions



views.reign <- left_join(reign, views.ged[,c(1:12, 56:62)], by = c('month_id','country_id'))



### this experiment failed #####
reign -> views.reign.pre1
views.reign.pre1$month_id + 1 ->  views.reign.pre1$month_id
views.reign.2month <- left_join(reign, views.reign.pre1,
                                by = c("month_id", "country_id"),
                                copy = FALSE,
                                suffix = c(".0", ".1"),
                                keep = FALSE,
                                na_matches = c("na", "never")
)


reign -> views.reign.pre2
views.reign.pre2$month_id + 2 ->  views.reign.pre2$month_id
views.reign.3month <- left_join(views.reign.2month, views.reign.pre2,
                                by = c("month_id", "country_id"),
                                copy = FALSE,
                                keep = FALSE,
                                na_matches = c("na", "never")
)


reign -> views.reign.pre3
views.reign.pre3$month_id + 3 ->  views.reign.pre3$month_id
views.reign.4month <- left_join(views.reign.3month, views.reign.pre3,
                                by = c("month_id", "country_id"),
                                copy = FALSE,
                                suffix = c(".2", ".3"),
                                keep = FALSE,
                                na_matches = c("na", "never")
)

views.reign.4month <- left_join(views.reign.4month, views.ged[,c(1:12, 56:62)], by = c('month_id','country_id'))
##### 9 predictors 6 month #####

### select top 9 predictors and use them for 6 month timeshift
views.reign.9V <- select(views.reign, month_id, country_id, reign_tenure_months, reign_couprisk, reign_precip, reign_loss,
                         reign_age, reign_lastelection, reign_irregular, reign_pctile_risk, reign_prev_conflict)


views.reign.9V -> views.reign.pre1
views.reign.pre1$month_id + 1 ->  views.reign.pre1$month_id
views.reign.9V.2m <- left_join(views.reign.9V, views.reign.pre1,
                                by = c("month_id", "country_id"),
                                copy = FALSE,
                                suffix = c(".0", ".1"),
                                keep = FALSE,
                                na_matches = c("na", "never")
)


views.reign.9V -> views.reign.pre2
views.reign.pre2$month_id + 2 ->  views.reign.pre2$month_id
views.reign.9V.3m <- left_join(views.reign.9V.2m, views.reign.pre2,
                               by = c("month_id", "country_id"),
                               copy = FALSE,
                               keep = FALSE,
                               na_matches = c("na", "never")
)


views.reign.9V -> views.reign.pre3
views.reign.pre3$month_id + 3 ->  views.reign.pre3$month_id
views.reign.9V.4m <- left_join(views.reign.9V.3m, views.reign.pre3,
                               by = c("month_id", "country_id"),
                               copy = FALSE,
                               suffix = c(".2", ".3"),
                               keep = FALSE,
                               na_matches = c("na", "never")
)


views.reign.9V -> views.reign.pre4
views.reign.pre4$month_id + 4 ->  views.reign.pre4$month_id
views.reign.9V.5m <- left_join(views.reign.9V.4m, views.reign.pre4,
                               by = c("month_id", "country_id"),
                               copy = FALSE,
                               keep = FALSE,
                               na_matches = c("na", "never")
)


views.reign.9V -> views.reign.pre5
views.reign.pre5$month_id + 5 ->  views.reign.pre5$month_id
views.reign.9V.6m <- left_join(views.reign.9V.5m, views.reign.pre5,
                               by = c("month_id", "country_id"),
                               copy = FALSE,
                               suffix = c(".4", ".5"),
                               keep = FALSE,
                               na_matches = c("na", "never")
)
#####
views.reign.9V.6m <- left_join(views.reign.9V.6m, views.ged[,c(1:12, 56:62)], by = c('month_id','country_id'))
#######


#+#+#+#
write.csv(views.reign, file="views_reign.csv", row.names=FALSE) 
write.csv(views.reign.4month, file="views_reign_4m.csv", row.names=FALSE) 




##### United Frame #####

views <- left_join(skeleton, acled, by = c('month_id','country_id'))
views <- left_join(views, ged, by = c('month_id','country_id'))
views <- left_join(views, ces, by = c('month_id','country_id'))
views <- left_join(views, imf, by = c('month_id','country_id'))
views <- left_join(views, reign, by = c('month_id','country_id'))

views2 <- left_join(views, vdem[,c(1:2, 13:317)], by = c('year','country_id'))
views2 <- left_join(views2, vdem2, by = c('year','country_id'))
views2 <- left_join(views2, vdem3, by = c('year','country_id'))

views3 <- left_join(views2, wdi, by = c('year','country_id'))
views3 <- left_join(views3, wdi2, by = c('year','country_id'))


united.01 <- dplyr::select(views3, year, month_id, country_id, country_name, in_africa, 
                             ged_best_sb, imfweo_bca_ngdpd_tcurrent, imfweo_bca_ngdpd_tmin1, imfweo_bca_ngdpd_tplus2,
                             imfweo_bca_ngdpd_tplus1, imfweo_bca_tplus1, imfweo_bca_tmin1, imfweo_bca_tplus2,
                             imfweo_bca_tcurrent, imfweo_lp_tcurrent,	imfweo_lp_tmin1, imfweo_ggb_ngdp_tmin1,
                             imfweo_ggb_ngdp_tplus2, imfweo_ngdp_rpch_tplus1, imfweo_ngdpdpc_tplus1, imfweo_ngdp_rpch_tcurrent,
                             imfweo_ngdpdpc_tplus2, imfweo_ngdp_d_tcurrent, imfweo_ngdp_rpch_tplus2, imfweo_ngdpdpc_tmin1,
                             imfweo_ngdpdpc_tcurrent,	vdem_e_migdpgro, 
                             wdi_vc_btl_deth, reign_precip, reign_prev_conflict, reign_tenure_months,
                             reign_irregular, reign_lastelection, reign_loss, reign_pctile_risk, 
                             reign_couprisk, ged_count_sb, ged_best_os, ged_count_os, wdi_eg_use_pcap_kg_oe,       
                             ged_best_ns, vdem_v2x_accountability, wdi_nv_srv_totl_zs, wdi_sp_pop_totl,
                             wdi_sl_tlf_totl_fe_zs, wdi_sm_pop_totl_zs, wdi_sm_pop_refg_or, wdi_dt_oda_odat_pc_zs,
                             ged_count_ns, reign_gov_dominant_party, reign_age, vdem_v2xpe_exlpol, wdi_ag_lnd_frst_k2,
                             wdi_st_int_rcpt_xp_zs, vdem_v2x_clpol, vdem_v2x_genpp, ces_confidence_p3_negotiations_tlag3,
                             ces_confidence_p12_violence_tlag3, ces_p3_agreement, ces_confidence_p12_violence_25brd_tlag6, 
                             ces_p12_violence, ces_p12_violence_tlag3, ces_p3_violence_civilians_pr_tlag6, wdi_eg_fec_rnew_zs,  
                             wdi_sp_rur_totl, wdi_ag_lnd_arbl_ha_pc, wdi_ag_lnd_agri_k2, wdi_ag_lnd_agri_zs, wdi_en_atm_co2e_pc,
                             wdi_eg_gdp_puse_ko_pp_kd, wdi_dt_nfl_uncf_cd, wdi_dt_nfl_wfpg_cd, wdi_en_atm_co2e_pp_gd,
                             wdi_fm_lbl_bmny_gd_zs, wdi_fm_ast_doms_cn,
                             wdi_ag_lnd_crel_ha, wdi_fi_res_xgld_cd, wdi_fm_ast_cgov_zg_m3, wdi_ne_trd_gnfs_zs,
                             wdi_ne_con_govt_kd_zg, wdi_ms_mil_xpnd_gd_zs, wdi_ms_mil_totl_tf_zs, wdi_nv_agr_empl_kd,
                             wdi_vc_btl_deth, wdi_sp_pop_tech_rd_p6, wdi_sp_reg_dths_zs, wdi_sp_pop_totl_fe_in,
                             wdi_sp_urb_totl, wdi_sl_emp_smgt_fe_zs, wdi_sh_dth_nmrt, wdi_se_prm_uner,
                             wdi_se_prm_enrl, wdi_ic_frm_femo_zs, wdi_en_atm_co2e_pc, wdi_eg_fec_rnew_zs)

united.01 <- left_join(united.01, views.ged[,c(2:3, 56:62)], by = c('month_id','country_id'))

#####
write.csv(united.01, file="united_01.csv", row.names=FALSE) 
#####

#+#+#+#+#+#+#+#


##### ACLED Data #####

views.acled <- left_join(skeleton, acled, by = c('month_id','country_id'))

### shift s1
acled[,1:6] -> acled_log

acled_log$month_id + 1 -> acled_log$month_id
colnames(acled_log) <- c("month_id", "country_id", "acled_count_pr_s1", "acled_count_sb_s1", "acled_count_ns_s1", "acled_count_os_s1")
views.acled <- left_join(views.acled, acled_log, by = c('month_id','country_id'))

### shift s2
acled[,1:6] -> acled_log2

acled_log2$month_id + 2 -> acled_log2$month_id
colnames(acled_log2) <- c("month_id", "country_id", "acled_count_pr_s2", "acled_count_sb_s2", "acled_count_ns_s2", "acled_count_os_s2")
views.acled <- left_join(views.acled, acled_log2, by = c('month_id','country_id'))

### shift s3
acled[,1:6] -> acled_log3

acled_log3$month_id + 3 -> acled_log3$month_id
colnames(acled_log3) <- c("month_id", "country_id", "acled_count_pr_s3", "acled_count_sb_s3", "acled_count_ns_s3", "acled_count_os_s3")
views.acled <- left_join(views.acled, acled_log3, by = c('month_id','country_id'))

### shift s4
acled[,1:6] -> acled_log4

acled_log4$month_id + 4 -> acled_log4$month_id
colnames(acled_log4) <- c("month_id", "country_id", "acled_count_pr_s4", "acled_count_sb_s4", "acled_count_ns_s4", "acled_count_os_s4")
views.acled <- left_join(views.acled, acled_log4, by = c('month_id','country_id'))

### shift s5
acled[,1:6] -> acled_log5

acled_log5$month_id + 5 -> acled_log5$month_id
colnames(acled_log5) <- c("month_id", "country_id", "acled_count_pr_s5", "acled_count_sb_s5", "acled_count_ns_s5", "acled_count_os_s5")
views.acled <- left_join(views.acled, acled_log5, by = c('month_id','country_id'))

### shift s6
acled[,1:6] -> acled_log6

acled_log6$month_id + 6 -> acled_log6$month_id
colnames(acled_log6) <- c("month_id", "country_id", "acled_count_pr_s6", "acled_count_sb_s6", "acled_count_ns_s6", "acled_count_os_s6")
views.acled <- left_join(views.acled, acled_log6, by = c('month_id','country_id'))


## if GED data not in env,
views.ged <- read.csv("views_ged.csv", header=TRUE)


views.acled <- left_join(views.acled, views.ged[,c(2:3, 7:62)], by = c('month_id','country_id'))

write.csv(views.acled, file="views_acled.csv", row.names=FALSE) 
